HandsOn Assignments: SOM - Coding¶

Aligned SOMs: Group 14¶

Author: Stefan Minkov & Jacopo Raffaelli Date: 29-01-2025 GitHub Repository: SOS2024_ExSOM_Coding_group14_12407827_12329537


Set Up¶

In [1]:
from SOMToolBox_Parse import SOMToolBox_Parse
import numpy as np
import matplotlib.pyplot as plt
from random import randint
import panel as pn
import holoviews as hv
from holoviews import opts
hv.extension('bokeh')
No description has been provided for this image No description has been provided for this image

The following functions are adaptations of the ones found in the visualization folders.

In [2]:
#HitHistogram
def HitHist(_m, _n, _weights, _idata):
    hist = np.zeros(_m * _n)
    for vector in _idata: 
        position =np.argmin(np.sqrt(np.sum(np.power(_weights - vector, 2), axis=1)))
        hist[position] += 1

    return hist.reshape(_m, _n)

#U-Matrix - implementation
def UMatrix(_m, _n, _weights, _dim):
    U = _weights.reshape(_m, _n, _dim)
    U = np.insert(U, np.arange(1, _n), values=0, axis=1)
    U = np.insert(U, np.arange(1, _m), values=0, axis=0)
    #calculate interpolation
    for i in range(U.shape[0]): 
        if i%2==0:
            for j in range(1,U.shape[1],2):
                U[i,j][0] = np.linalg.norm(U[i,j-1] - U[i,j+1], axis=-1)
        else:
            for j in range(U.shape[1]):
                if j%2==0: 
                    U[i,j][0] = np.linalg.norm(U[i-1,j] - U[i+1,j], axis=-1)
                else:      
                    U[i,j][0] = (np.linalg.norm(U[i-1,j-1] - U[i+1,j+1], axis=-1) + np.linalg.norm(U[i+1,j-1] - U[i-1,j+1], axis=-1))/(2*np.sqrt(2))

    U = np.sum(U, axis=2) #move from Vector to Scalar

    for i in range(0, U.shape[0], 2): #count new values
        for j in range(0, U.shape[1], 2):
            region = []
            if j>0: region.append(U[i][j-1]) #check left border
            if i>0: region.append(U[i-1][j]) #check bottom
            if j<U.shape[1]-1: region.append(U[i][j+1]) #check right border
            if i<U.shape[0]-1: region.append(U[i+1][j]) #check upper border

            U[i,j] = np.median(region)

    return U

#SDH - implementation
def SDH(_m, _n, _weights, _idata, factor, approach):
    import heapq

    sdh_m = np.zeros( _m * _n)

    cs=0
    for i in range(factor): cs += factor-i

    for vector in _idata:
        dist = np.sqrt(np.sum(np.power(_weights - vector, 2), axis=1))
        c = heapq.nsmallest(factor, range(len(dist)), key=dist.__getitem__)
        if (approach==0): # normalized
            for j in range(factor):  sdh_m[c[j]] += (factor-j)/cs 
        if (approach==1):# based on distance
            for j in range(factor): sdh_m[c[j]] += 1.0/dist[c[j]] 
        if (approach==2): 
            dmin, dmax = min(dist[c]), max(dist[c])
            for j in range(factor): sdh_m[c[j]] += 1.0 - (dist[c[j]]-dmin)/(dmax-dmin)

    return sdh_m.reshape(_m, _n)
In [3]:
#To ensure reproducibility
def setRandomSeed(seed):
    np.random.seed(seed)

Implemented Functions¶

A-SOM class to represent Layers with¶

The following code defines a class for Aligned SOM's. In this implementation, the proximity to the best matching unit (BMU) is based on euclidean distance.

In [4]:
"""
    A class to represent an Aligned Self-Organizing Map (SOM).
    Attributes:
    -----------
    m : int
        Number of rows in the SOM grid.
    n : int
        Number of columns in the SOM grid.
    dim : int
        Dimensionality of the input data.
    layer : int, optional
        Layer index of the SOM (default is 0).
    n_iterations : int, optional
        Number of iterations for training (default is 100).
    alpha : float, optional
        Initial learning rate.
    sigma : float, optional
        Initial neighborhood radius.
    p_values : np.ndarray, optional
        Scaling factors for input dimensions (default is np.ones(dim)).
    trained : bool
        Indicates whether the SOM has been trained.
    weightages : np.ndarray
        Weights of the SOM nodes.
    deltas : np.ndarray
        Changes in weights during last training iteration.
    locations : np.ndarray
        Locations of the SOM nodes in the grid.
    """
class AlignedSOM:

    def __init__(self, n, m, dim, layer=0, n_iterations=100, alpha=None, sigma=None, p_values=None):
        # size of the SOM
        self.m = m 
        self.n = n
        # Dimentions of the input data
        self.dim = dim
        # Number of layers
        self.layer = layer
        #Number of iterations for the training process
        self.n_iterations = n_iterations
        #Initial learning rate
        self.alpha = alpha if alpha else 0.3
        #Initial neighbourhood radius
        self.sigma = sigma if sigma else max(m, n) / 2.0
        # Indicated whether the SOM (layer) is trained
        self.trained = False
        # Weights
        self.weightages = None
        # P-values
        self.p_values = p_values if p_values is not None else np.ones(dim)
        # Delta values
        self.deltas = np.zeros((m * n, dim))
        # Neurons
        self.locations = np.array([np.array([i, j]) for i in range(m) for j in range(n)])

    """
    Inializes the weights of a SOM
    """
    def initialize_weights(self, shared_weights):
        self.weightages = shared_weights

    """
    Finds the Best Matching Unit (BMU) for a given input vector.
    """
    def find_bmu(self, input_vect):
        # Scale input vector with p-values, thus regulating which features have more importance in the current layer
        scaled_input = input_vect * self.p_values
        scaled_weights = self.weightages * self.p_values
        distances = np.linalg.norm(scaled_weights - scaled_input, axis=1)
        return np.argmin(distances)
    """
    Trains the SOM with a given input vector for a specific iteration.
    The number of the iteration is used to update the learning rate and neighborhood radius.
    """
    def train(self, input_vect, iter_no):
        bmu_index = self.find_bmu(input_vect)
        bmu_location = self.locations[bmu_index]

        # Update learning rate and neighborhood radius
        learning_rate = self.alpha * (1 - iter_no / self.n_iterations)
        neighborhood_radius = self.sigma * (1 - iter_no / self.n_iterations)

        # Update weights of nodes in the neighborhood of the BMU
        for i, location in enumerate(self.locations):
            distance_to_bmu = np.linalg.norm(location - bmu_location)
            if distance_to_bmu <= neighborhood_radius:
                influence = np.exp(-distance_to_bmu ** 2 / (2 * (neighborhood_radius ** 2)))
                delta = learning_rate * influence * (input_vect - self.weightages[i])
                self.weightages[i] += delta
                self.deltas[i] = delta

        # Sets the SOM as trained
        self.trained = True

    """
    Aligns the SOM weights with given deltas(of another layer) and decay factor
    """
    def align(self, deltas, decay):
        self.weightages += deltas * decay

    """
    Maps input vectors to their corresponding BMUs in the SOM grid.
    Used for visualizing the mappings of input data on the SOM grid.
    """
    def map_vects(self, input_vects):
        if not self.trained:
            raise ValueError("SOM not trained yet")

        mapped = []
        for vect in input_vects:
            bmu_index = self.find_bmu(vect)
            mapped.append(self.locations[bmu_index])
        return mapped

The following fuction adjusts the weights of other layers based on the current layer’s weight changes. This is to ensure alignment across layers.

In [5]:
"""
    Aligns the layers of the SOM based on the current layer's deltas.
    Parameters:
    -----------
    current_layer : AlignedSOM
        The current layer being trained.
    layers : list of AlignedSOM
        List of all SOM layers.
    coef : float, optional
        Coefficient for decay calculation (default is 1.0).
"""
def align_layers(current_layer, layers, coef=1.0):
    for layer in layers:
        if layer != current_layer:
            decay = coef * (2 ** (-abs(current_layer.layer - layer.layer)))
            layer.align(current_layer.deltas, decay)

Helper functions for the main training procedure¶

The following code initializes the p-values given the input dimensions.

In [6]:
"""
    Initializes p-values for scaling input dimensions.
    Does an interpolation, similar as the one shown in (E.Pampalk, 2003)
    Parameters:
    -----------
    dim : int
        Dimensionality of the input data.
    Returns:
    --------
    tuple of np.ndarray
        Two arrays of p-values for the input dimensions.
"""
def init_p_values(dim):
    p0_values = np.zeros(dim, dtype=int)
    p1_values = np.zeros(dim, dtype=int)
    if dim == 1:
        p0_values[0] = 1
        p1_values[0] = 1
    elif dim == 2:
        p0_values[0] = 1
        p1_values[1] = 1
    else:
        p0_values[0] = 1
        p1_values[dim-1] = 1
        p0_values[dim//2] = 1
        p1_values[dim//2] = 1
        for i in range(1, dim//2 + 1):
            p0_values[i] = 1 + (dim//2 - i)
            p1_values[i] = 1
        for i in range(dim//2 + 1, dim - 1):
            p0_values[i] = 1
            p1_values[i] = 1 + (i - dim//2)
    return p0_values, p1_values

The following functions extracts the p-values for a given layer.

In [7]:
"""
    Gets the p-values for a specific layer.
    Parameters:
    -----------
    layer : int
        Layer index.
    p0_values : np.ndarray
        Array of p0 values.
    p1_values : np.ndarray
        Array of p1 values.
    p0_indices : list of int
        Indices for p0 values.
    p1_indices : list of int
        Indices for p1 values.
    Returns:
    --------
    np.ndarray
        Array of p-values for the specified layer.
    """
def get_p_values_for_layer(layer, dim, p0_values, p1_values, p0_indices, p1_indices):
    p_values = np.zeros(dim)
    p_values[p0_indices] = p0_values[layer]
    p_values[p1_indices] = p1_values[layer]
    return p_values

Main Training Procedure¶

The following function trains an Aligned SOM using the given parameters. Each layer applies different weightings based on p-values, and the updates to weights are propagated between layers based on the coefficient for alignment decay.

In [8]:
"""
    Trains an Aligned Self-Organizing Map (ASOM) with the given data.
    Parameters:
    -----------
    data : np.ndarray
        Input data for training.
    m : int
        Number of rows in the SOM grid.
    n : int
        Number of columns in the SOM grid.
    dim : int
        Dimensionality of the input data.
    layers : int
        Number of SOM layers.
    iterations : int
        Number of iterations for training.
    p0_values : np.ndarray
        Array of p0 values for scaling input dimensions.
    p1_values : np.ndarray
        Array of p1 values for scaling input dimensions.
    p0_indices : list of int
        Indices for p0 values.
    p1_indices : list of int
        Indices for p1 values.
    alpha : float, optional
        Initial learning rate.
    sigma : float, optional
        Initial neighborhood radius.
    alignment_coef : float, optional
        Coefficient for alignment decay (default is 1.0).
    Returns:
    --------
    list of AlignedSOM
        List of trained SOM layers.
    """
def train_ASOM(data, n, m, dim, layers, iterations, p0_indices, p1_indices, alpha=None, sigma=None, alignment_coef = 1.0):
    shared_weights = np.random.normal(size=(n*m, dim))
    p0_values, p1_values = init_p_values(layers)
    layers = [AlignedSOM(n, m, dim, layer=i, n_iterations=iterations, p_values=get_p_values_for_layer(i, dim, p0_values, p1_values, p0_indices, p1_indices), alpha=alpha, sigma=sigma) for i in range(layers)]
    for layer in layers:
        layer.initialize_weights(shared_weights.copy())

    for iteration in range(iterations):
        current_layer = np.random.choice(layers)
        current_data_point = np.random.choice(len(data))
        current_layer.train(data[current_data_point], iteration)
        align_layers(current_layer, layers, alignment_coef)
    return layers

Visualize all Layers¶

This function visualises datapoints across different layers of the Aligned SOM.

In [9]:
"""
    Visualizes the mappings of input data on the SOM layers.
    Parameters:
    -----------
    layers : list of AlignedSOM
        List of trained SOM layers.
    data : np.ndarray
        Input data to be mapped.
    data_names : list of str, optional
        Names of the input data points (default is None).
    """
def visualize_ASOM_layers(layers, data, data_names=None):
    for layer_index, layer in enumerate(layers):
        mapping = layer.map_vects(data)
        plt.figure()
        plt.title(f"Layer {layer_index} Mapping")
        for i, loc in enumerate(mapping):
            plt.text(loc[0], loc[1], data_names[i] if data_names is not None else i, ha='center', va='center')
        plt.xlim(-1, layer.m + 1)
        plt.ylim(-1, layer.n + 1)
        plt.gca().invert_yaxis()
        plt.show()

Example¶

In [10]:
animals = np.array(
    [[1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0.],
     [1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
     [1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 1.],
     [1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 1., 1.],
     [1., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0.],
     [1., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0.],
     [0., 1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0.],
     [0., 1., 0., 0., 1., 1., 0., 0., 0., 1., 0., 0., 0.],
     [0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 0.],
     [0., 1., 0., 0., 1., 1., 0., 1., 0., 1., 1., 0., 0.],
     [1., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 0., 0.],
     [0., 0., 1., 0., 1., 1., 0., 0., 0., 1., 1., 0., 0.],
     [0., 0., 1., 0., 1., 1., 0., 1., 0., 1., 1., 0., 0.],
     [0., 0., 1., 0., 1., 1., 1., 1., 0., 0., 1., 0., 0.],
     [0., 0., 1., 0., 1., 1., 1., 1., 0., 0., 1., 0., 0.],
     [0., 0., 1., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0.]])
animals_names = ['dove', 'hen', 'duck', 'goose', 'owl', 'hawk', 'eagle', 'fox',
                 'dog', 'wolf', 'cat', 'tiger', 'lion', 'horse', 'zebra', 'cow']


# Shared initialization
dim = 13
layers = 5
n = 20
m = 30
p0_indices = [i for i in range(9)]
p1_indices = [i for i in range(9, 13)]
iterations = 200
lrm = 0.3



layers = train_ASOM(
    data = animals,
    n=n,
    m=m,
    dim= dim,
    layers= layers,
    iterations= iterations, 
    p0_indices = p0_indices, 
    p1_indices=p1_indices,
    alpha=lrm
     )
visualize_ASOM_layers(layers, animals, animals_names)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

Implementation¶

Initialization of the training parameters

In [11]:
# small SOM
n = 10
m = 10
iterations = 100
# large SOM
n_large = 100
m_large = 60
iterations_large = 500

lrm = 0.3
n_layers = 5

10-Clusters¶

Loading the Data¶

In [12]:
data_10clusters = SOMToolBox_Parse("datasets/10clusters/10clusters.vec").read_weight_file()

Training a Small and Large SOM¶

We first train a small (10x10) SOM and later a large (100x60) SOM.

Make sure that the SOMs are properly trained, i.e. that the structures to be expected in the SOM become clearly visible by identifying suitable parameters for the initial neighborhood radius and initial learning rate.

In [13]:
setRandomSeed(56)

# Shared initialization
data_array = np.array(data_10clusters['arr'])
dim = data_10clusters['vec_dim']
p0_indices = [i for i in range(dim//2)]
p1_indices = [i for i in range(dim//2, dim)]

layers = train_ASOM(
    data = data_array,
    n = n,
    m = m,
    dim = dim,
    layers = n_layers,
    iterations = iterations,
    p0_indices = p0_indices, 
    p1_indices = p1_indices,
    alpha = lrm
     )

layers_large = train_ASOM(
    data = data_array,
    n = n_large,
    m = m_large,
    dim = dim,
    layers = n_layers,
    iterations = iterations_large,
    p0_indices = p0_indices, 
    p1_indices = p1_indices,
    alpha = lrm
     )

The visualization functions are used on the results from the trained SOM (small)

In [14]:
som_weights = layers[-1].weightages

# def HitHist(_m, _n, _weights, _idata):
hithist = hv.Image(HitHist(m, n, som_weights, data_array)).opts(xaxis=None, yaxis=None) 
#def UMatrix(_m, _n, _weights, _dim):
um = hv.Image(UMatrix(m, n, som_weights, dim)).opts(xaxis=None, yaxis=None) 
#def SDH(_m, _n, _weights, _idata, factor, approach):
sdh = hv.Image(SDH(m,n, som_weights, data_array, 25, 0)).opts(xaxis=None, yaxis=None)

hv.Layout([hithist.relabel('HitHist').opts(cmap='kr'), 
           um.relabel('U-Matrix').opts(cmap='jet'), sdh.relabel('SDH').opts(cmap='viridis')]) 
Out[14]:
In [15]:
visualize_ASOM_layers(layers=layers, data=data_array, data_names=None)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

The visualization functions are used on the results from the trained SOM (large)

In [16]:
som_weights_large = layers_large[-1].weightages

# def HitHist(_m, _n, _weights, _idata):
hithist = hv.Image(HitHist(m_large, n_large, som_weights_large, data_array)).opts(xaxis=None, yaxis=None) 
#def UMatrix(_m, _n, _weights, _dim):
um = hv.Image(UMatrix(m_large, n_large, som_weights_large, dim)).opts(xaxis=None, yaxis=None) 
#def SDH(_m, _n, _weights, _idata, factor, approach):
sdh = hv.Image(SDH(m_large,n_large, som_weights_large, data_array, 25, 0)).opts(xaxis=None, yaxis=None)

hv.Layout([hithist.relabel('HitHist').opts(cmap='kr'), 
           um.relabel('U-Matrix').opts(cmap='jet'), sdh.relabel('SDH').opts(cmap='viridis')]) 
Out[16]:
In [17]:
visualize_ASOM_layers(layers=layers_large, data=data_array, data_names=None)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

Visualizations with different Parameters¶

We now train SOM's with alternating values for alpha and sigma, to see how a difference in initialization influences the outcome.

In [18]:
alpha_low = 0.3
alpha_high = 0.5
sigma_small = 1.5
sigma_large = 5

parameter_sets = [
    (alpha_low, sigma_large, "Alpha = 0.3, Sigma = 5"),
    (alpha_low, sigma_small, "Alpha = 0.3, Sigma = 1.5"),
    (alpha_high, sigma_large, "Alpha = 0.5, Sigma = 5"),
    (alpha_high, sigma_small, "Alpha = 0.5, Sigma = 1.5")
]
In [19]:
trained_layers_list = []

for alpha, sigma, title in parameter_sets:
    layers = train_ASOM(
        data=data_array,
        n=n,
        m=m,
        dim=dim,
        layers=n_layers,
        iterations=iterations,
        p0_indices=p0_indices,
        p1_indices=p1_indices,
        alpha=alpha,
        sigma=sigma
    )
    trained_layers_list.append(layers)

# Create a list to store the rows
rows = []

# Iterate over each parameter set and create the plots
for param_index, trained_layers in enumerate(trained_layers_list):
    # Get the weights and dimensions from the trained SOM
    som_weights = trained_layers[-1].weightages  # Final layer weights
    
    # HitHistogram
    hit_hist = HitHist(m, n, som_weights, data_array)
    hithist = hv.Image(hit_hist).opts(title=f"HitHistogram - {parameter_sets[param_index][2]}", cmap='Blues', xaxis=None, yaxis=None, fontsize={'title': 10})

    # U-Matrix
    u_matrix = UMatrix(m, n, som_weights, dim)
    um = hv.Image(u_matrix).opts(title=f"U-Matrix - {parameter_sets[param_index][2]}", cmap='inferno', xaxis=None, yaxis=None, fontsize={'title': 10})

    # SDH
    sdh = SDH(m, n, som_weights, data_array, factor=5, approach=0)
    sdh_plot = hv.Image(sdh).opts(title=f"SDH - {parameter_sets[param_index][2]}", cmap='viridis', xaxis=None, yaxis=None, fontsize={'title': 10})

    # Create a layout for this parameter set (with 3 plots in a row)
    row = hv.Layout([hithist.opts(cmap='kr'), um.opts(cmap='jet'), sdh_plot.opts(cmap='viridis')]).cols(3)  # Use .cols(3) to arrange them in a single row
    rows.append(row)  # Add this row to the list

# Combine all rows into a single layout (each row will be displayed on a new line)
pn.Column(*rows).servable()
Out[19]:

Visualizations of a pretrained SOM¶

In [20]:
idata = SOMToolBox_Parse("datasets/10clusters/10clusters.vec").read_weight_file()
weights = SOMToolBox_Parse("datasets/10clusters/10clusters.wgt.gz").read_weight_file()
reshaped_weights = weights['arr'].reshape(10, 10, 10)  # 10x10 grid, with 10 features per weight

# def HitHist(_m, _n, _weights, _idata):
hithist = hv.Image(HitHist(weights['ydim'], weights['ydim'], weights['arr'], idata['arr'])).opts(xaxis=None, yaxis=None) 
#def UMatrix(_m, _n, _weights, _dim):
um = hv.Image(UMatrix(weights['ydim'], weights['ydim'], weights['arr'], idata['vec_dim'])).opts(xaxis=None, yaxis=None) 
#def SDH(_m, _n, _weights, _idata, factor, approach):
sdh = hv.Image(SDH(weights['ydim'], weights['ydim'], weights['arr'], idata['arr'], 25, 0)).opts(xaxis=None, yaxis=None)

hv.Layout([hithist.relabel('HitHist').opts(cmap='kr'), 
           um.relabel('U-Matrix').opts(cmap='jet'), sdh.relabel('SDH').opts(cmap='viridis')])   
Out[20]:

As we can see from the above plots, the visualizations generated by our SOM's do not show the same information by the ones generated by the pretrained SOM's. This could be due to a variety of factors. For example, the way the distance function is evaluated could be different between the two instances, which could lead to the vast difference. Alternatively, it could be that the choice for parameters was not optimal. This is less likely though, because a variety of initializations were tested, with none producing similar results as the ones above.

Chainlink¶

Loading the Data¶

In [21]:
data_chainlink = SOMToolBox_Parse("datasets/chainlink/chainlink.vec").read_weight_file()

Training a Small and Large SOM¶

We first train a small (10x10) SOM and later a large (100x60) SOM.

Make sure that the SOMs are properly trained, i.e. that the structures to be expected in the SOM become clearly visible by identifying suitable parameters for the initial neighborhood radius and initial learning rate.

In [22]:
setRandomSeed(56)

# Shared initialization
data_array_cl = np.array(data_chainlink['arr'])
dim_cl = data_chainlink['vec_dim']
p0_indices_cl = [i for i in range(dim_cl//2)]
p1_indices_cl = [i for i in range(dim_cl//2, dim_cl)]

layers_cl = train_ASOM(
    data = data_array_cl,
    n = n,
    m = m,
    dim = dim_cl,
    layers = n_layers,
    iterations = iterations,
    p0_indices = p0_indices_cl, 
    p1_indices = p1_indices_cl,
    alpha = lrm
     )

layers_large_cl = train_ASOM(
    data = data_array_cl,
    n = n_large,
    m = m_large,
    dim = dim_cl,
    layers = n_layers,
    iterations = iterations_large,
    p0_indices = p0_indices_cl, 
    p1_indices = p1_indices_cl,
    alpha = lrm
     )
In [23]:
som_weights_cl = layers_cl[-1].weightages

# def HitHist(_m, _n, _weights, _idata):
hithist = hv.Image(HitHist(m, n, som_weights_cl, data_array_cl)).opts(xaxis=None, yaxis=None) 
#def UMatrix(_m, _n, _weights, _dim):
um = hv.Image(UMatrix(m, n, som_weights_cl, dim_cl)).opts(xaxis=None, yaxis=None) 
#def SDH(_m, _n, _weights, _idata, factor, approach):
sdh = hv.Image(SDH(m,n, som_weights_cl, data_array_cl, 25, 0)).opts(xaxis=None, yaxis=None)

hv.Layout([hithist.relabel('HitHist').opts(cmap='kr'), 
           um.relabel('U-Matrix').opts(cmap='jet'), sdh.relabel('SDH').opts(cmap='viridis')]) 
Out[23]:
In [24]:
visualize_ASOM_layers(layers=layers_cl, data=data_array_cl, data_names=None)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [25]:
som_weights_large_cl = layers_large_cl[-1].weightages

# def HitHist(_m, _n, _weights, _idata):
hithist = hv.Image(HitHist(m_large, n_large, som_weights_large_cl, data_array_cl)).opts(xaxis=None, yaxis=None) 
#def UMatrix(_m, _n, _weights, _dim):
um = hv.Image(UMatrix(m_large, n_large, som_weights_large_cl, dim_cl)).opts(xaxis=None, yaxis=None) 
#def SDH(_m, _n, _weights, _idata, factor, approach):
sdh = hv.Image(SDH(m_large,n_large, som_weights_large_cl, data_array_cl, 25, 0)).opts(xaxis=None, yaxis=None)

hv.Layout([hithist.relabel('HitHist').opts(cmap='kr'), 
           um.relabel('U-Matrix').opts(cmap='jet'), sdh.relabel('SDH').opts(cmap='viridis')]) 
Out[25]:
In [26]:
visualize_ASOM_layers(layers=layers_large_cl, data=data_array_cl, data_names=None)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

Visualizations with different Parameters¶

We now train SOM's with alternating values for alpha and sigma, to see how a difference in initialization influences the outcome.

In [27]:
alpha_low = 0.3
alpha_high = 0.5
sigma_small = 1.5
sigma_large = 5

parameter_sets = [
    (alpha_low, sigma_large, "Alpha = 0.3, Sigma = 5"),
    (alpha_low, sigma_small, "Alpha = 0.3, Sigma = 1.5"),
    (alpha_high, sigma_large, "Alpha = 0.5, Sigma = 5"),
    (alpha_high, sigma_small, "Alpha = 0.5, Sigma = 1.5")
]
In [28]:
trained_layers_list_cl = []

for alpha, sigma, title in parameter_sets:
    layers = train_ASOM(
        data=data_array_cl,
        n=n,
        m=m,
        dim=dim_cl,
        layers=n_layers,
        iterations=iterations,
        p0_indices=p0_indices_cl,
        p1_indices=p1_indices_cl,
        alpha=alpha,
        sigma=sigma
    )
    trained_layers_list_cl.append(layers)

# Create a list to store the rows
rows = []

# Iterate over each parameter set and create the plots
for param_index, trained_layers in enumerate(trained_layers_list_cl):
    # Get the weights and dimensions from the trained SOM
    som_weights_cl = trained_layers[-1].weightages  # Final layer weights
    
    # HitHistogram
    hit_hist = HitHist(m, n, som_weights_cl, data_array_cl)
    hithist = hv.Image(hit_hist).opts(title=f"HitHistogram - {parameter_sets[param_index][2]}", cmap='Blues', xaxis=None, yaxis=None, fontsize={'title': 10})

    # U-Matrix
    u_matrix = UMatrix(m, n, som_weights_cl, dim_cl)
    um = hv.Image(u_matrix).opts(title=f"U-Matrix - {parameter_sets[param_index][2]}", cmap='inferno', xaxis=None, yaxis=None, fontsize={'title': 10})

    # SDH
    sdh = SDH(m, n, som_weights_cl, data_array_cl, factor=5, approach=0)
    sdh_plot = hv.Image(sdh).opts(title=f"SDH - {parameter_sets[param_index][2]}", cmap='viridis', xaxis=None, yaxis=None, fontsize={'title': 10})

    # Create a layout for this parameter set (with 3 plots in a row)
    row = hv.Layout([hithist.opts(cmap='kr'), um.opts(cmap='jet'), sdh_plot.opts(cmap='viridis')]).cols(3)  # Use .cols(3) to arrange them in a single row
    rows.append(row)  # Add this row to the list

# Combine all rows into a single layout (each row will be displayed on a new line)
pn.Column(*rows).servable()
Out[28]:

Visualizations of a pretrained SOM¶

In [29]:
idata_cl = SOMToolBox_Parse("datasets/chainlink/chainlink.vec").read_weight_file()
weights_cl = SOMToolBox_Parse("datasets/chainlink/chainlink.wgt.gz").read_weight_file()

# def HitHist(_m, _n, _weights, _idata):
hithist = hv.Image(HitHist(weights_cl['ydim'], weights_cl['ydim'], weights_cl['arr'], idata_cl['arr'])).opts(xaxis=None, yaxis=None) 
#def UMatrix(_m, _n, _weights, _dim):
um = hv.Image(UMatrix(weights_cl['ydim'], weights_cl['ydim'], weights_cl['arr'], 2)).opts(xaxis=None, yaxis=None) 
#def SDH(_m, _n, _weights, _idata, factor, approach):
sdh = hv.Image(SDH(weights_cl['ydim'], weights_cl['ydim'], weights['arr'], idata['arr'], 25, 0)).opts(xaxis=None, yaxis=None)

hv.Layout([hithist.relabel('HitHist').opts(cmap='kr'), 
           um.relabel('U-Matrix').opts(cmap='jet'), sdh.relabel('SDH').opts(cmap='viridis')])   
Out[29]:

With the Chainlink dataset, we obtain similar levels of similarity as with the 10-Clusters dataset, since the visualizations obtained from the SOM we trained and the pre-trained one differ drasitcally. Therefore, the same observations made before apply here. Additionally, there is the chance that we are not displaying the visualizations for the pre-trained SOM correctly, since the code produced quite a few mistakes. This was due to the idata(vec_dim), which although is supposed to be accurate, gave errors when producing the U-Matrix. We set the value equal to 2, since given the shape of the data it was the only value that made sense, but there is the risk that that is a mistake.